This page presents visualization of the residual oil project.

EDA and visualization

Air pollution: PM2.5 and SO2

The data comes from NYCCAS. I am comparing data in 2011, which is the year before implementation of the Clean Heat Program, and year 2016, which is the year by which all heating oil #6 should be converted.

The pollutants of interest are PM2.5 and SO2. Here I imported the data and plotted the spatial distribution in 2011 and 2016 and the changes for these two pollutants.

# pm in 2011
pm_12_plot <- ggplot(tract_plot) +
  aes(long, lat, group = group, fill=cut_number(m__2012, 5)) +
  geom_polygon() +
  coord_equal() + 
  scale_fill_brewer(name="Quantile", palette="Blues") +
  labs(
    title = 'PM2.5, 2011, ug/m3',
    x = '',
    y = ''
  ) +
  north(tract_plot) +
  theme_bw() + theme(legend.position = 'none')

# pm in 2016
pm_16_plot <- ggplot(tract_plot) +
  aes(long, lat, group=group, fill=cut_number(m__2016, 5)) +
  geom_polygon() +
  coord_equal() +
  scale_fill_brewer(name="Quantile", palette="Blues") +
  labs(
    title = str_c('PM2.5, 2016, ug/m3'),
    x = '',
    y = ''
  ) +
  north(tract_plot) +
  theme_bw() + theme(legend.position = 'none')

# reduction in pm
pm_diff_plot <- ggplot(tract_plot) +
  aes(long, lat, group=group, fill=cut_number(pm_diff, 5)) +
  geom_polygon() +
  coord_equal() +
  scale_fill_brewer(name="Quantile", palette="Blues") +
  labs(
    title = str_c('PM2.5 (2011-2016),ug/m3'),
    x = '',
    y = ''
  ) +
  north(tract_plot) +
  theme_bw() + theme(legend.position = 'none')

# relative difference
pm_rdiff_plot <- ggplot(tract_plot) +
  aes(long, lat, group=group, fill=cut_number(pm_rdff, 5)) +
  geom_polygon() +
  coord_equal() +
  scale_fill_brewer(name="Quantile", palette="Blues") +
  labs(
    title = str_c('PM2.5 (2011-2016), relative'),
    x = '',
    y = '',
    caption = 'Data source: NYCCAS'
  ) +
  north(tract_plot) +
  theme_bw() + theme(legend.position = 'none')
# combine
(pm_12_plot + pm_16_plot)/(pm_diff_plot + pm_rdiff_plot)

PM2.5

popup_pm <- str_c('<b>GEOID: </b>', nyc_tract$geoid, '<br>',
               '<b>PM2.5 change: </b>', round(nyc_tract$PM2.5_change, digits = 2))

pal_pm <- colorQuantile('Greens', NULL, n = 5)
leaflet() %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(data = nyc_tract,
              fillColor = ~pal_pm(PM2.5_change),
              color = 'grey',
              weight = 1,
              fillOpacity = 0.8,
              popup = popup_pm,
              highlight = highlightOptions(
    weight = 4,
    color = "#666",
    fillOpacity = 0.3,
    bringToFront = TRUE)) %>% 
   addLegend(data = nyc_tract,
     pal = pal_pm, 
             values = ~nyc_tract$PM2.5_change, opacity = 0.7, title = NULL,
    position = "bottomright")

SO2

I made a similar plot to show the changes in SO2 from 2012 to 2016. In fact, reduction.

popup_so2 <- str_c('<b>GEOID: </b>', nyc_tract$geoid, '<br>',
               '<b>SO2 change: </b>', round(nyc_tract$SO2_change, digits = 2))

pal_so2 <- colorQuantile('Blues', NULL, n = 5)
leaflet() %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(data = nyc_tract,
              fillColor = ~pal_so2(SO2_change),
              color = 'grey',
              weight = 1,
              fillOpacity = 0.8,
              popup = popup_so2,
              highlight = highlightOptions(
    weight = 4,
    color = "#666",
    fillOpacity = 0.3,
    bringToFront = TRUE)) %>% 
   addLegend(data = nyc_tract,
     pal = pal_so2, 
             values = ~nyc_tract$SO2_change, opacity = 0.7, title = NULL,
    position = "bottomright")

These plots show that:

  • In general, PM2.5 has reduced across the city, as showed in the quantile legends.
  • in both 2011 and 2016, PM2.5 is very clustered in Manhattan, South Bronx and Part of Queens.
  • Reduction of air pollution is clustered in Northern Manhattan and the Bronx.
# pm in 2011
so2_12_plot <- ggplot(tract_plot) +
  aes(long, lat, group = group, fill=cut_number(m_2_2012, 5)) +
  geom_polygon() +
  coord_equal() + 
  scale_fill_brewer(name="Quantile", palette="Greens") +
  labs(
    title = 'SO2, 2011',
    x = '',
    y = ''
  ) +
  north(tract_plot) +
  theme_bw() + theme(legend.position = 'none') +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()
        )

# pm in 2016
so2_16_plot <- ggplot(tract_plot) +
  aes(long, lat, group=group, fill=cut_number(m_2_2016, 5)) +
  geom_polygon() +
  coord_equal() +
  scale_fill_brewer(name="Quantile", palette="Greens") +
  labs(
    title = 'SO2, 2016',
    x = '',
    y = ''
  ) +
  north(tract_plot) +
  theme_bw() + theme(legend.position = 'none') +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()
        )

so2_diff_plot <- ggplot(tract_plot) +
  aes(long, lat, group = group, fill=cut_number(so2_dff, 5)) +
  geom_polygon() +
  coord_equal() + 
  scale_fill_brewer(name="Quantile", palette="Greens") +
  labs(
    title = 'Reduction in SO2, 2011-2016',
    x = '',
    y = ''
  ) +
  north(tract_plot) +
  theme_bw() + theme(legend.position = 'none') +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()
        )


# reduction in pms
so2_rdiff_plot <- ggplot(tract_plot) +
  aes(long, lat, group=group, fill=cut_number(s2_rdff, 5)) +
  geom_polygon() +
  coord_equal() +
  scale_fill_brewer(name="Quantile", palette="Greens") +
  labs(
    title = 'Reduction in SO2, relative',
    x = '',
    y = '',
    caption = 'Data source: NYCCAS'
  ) +
  north(tract_plot) +
  theme_bw() + theme(legend.position = 'none') + 
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()
        )
# combine
(so2_12_plot + so2_16_plot)/(so2_diff_plot + so2_rdiff_plot)
ro2_breaks <- getJenksBreaks(nyc_tract@data$d_ro2, 5)
ro2_change <- ggplot(tract_plot) +
  aes(long, lat, group = group, fill=cut(d_ro2, breaks = c(-6, -1, 0, 1, 6, 40, 106, 159), include.lowest = TRUE)) +
  geom_polygon() +
  coord_equal() +
  scale_fill_brewer(name="Quantile", palette = "RdBu", direction = -1) +
  labs(
    title = 'Changes in fuel oil #2'
  ) +
  north(tract_plot) +
  theme_bw() +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()
        )

ro4_breaks <- getJenksBreaks(nyc_tract@data$d_ro4, 5)
ro4_change <- ggplot(tract_plot) +
  aes(long, lat, group = group, fill=cut(d_ro2, breaks = c(-31,-1, 0, 14, 46, 137, 305), include.lowest = TRUE)) +
  geom_polygon() +
  coord_equal() +
  scale_fill_brewer(name="Quantile", palette = "RdBu", direction = -1) +
  labs(
    title = 'Changes in fuel oil #4'
  ) +
  north(tract_plot) +
  theme_bw() +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()
        )
  
ro6_breaks <- getJenksBreaks(nyc_tract@data$d_ro6, 5)
ro6_change <- ggplot(tract_plot) +
  aes(long, lat, group = group, fill = cut(d_ro6, breaks = c(-69, -1, 0, 6, 47, 130, 248), include.lowest = TRUE)) +
  geom_polygon() +
  coord_equal() +
  scale_fill_brewer(name="Quantile", palette = "RdBu", direction = -1) +
  labs(
    title = 'Changes in fuel oil #6'a
  ) +
  north(tract_plot) +
  theme_bw() +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()
        )


ng_breaks <- getJenksBreaks(nyc_tract@data$d_ng, 5)
ng_change <- ggplot(tract_plot) +
  aes(long, lat, group = group, fill = cut(d_ng, breaks = c(-482, -13, -1, 0,  138))) +
  geom_polygon() +
  coord_equal() +
  scale_fill_brewer(name="Quantile", palette = "RdBu", direction = -1) +
  labs(
    title = 'Changes in natural gas'
  ) +
  north(tract_plot) +
  theme_bw() +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()
        )

Analysis

Linear regression

To make things easier for my analysis, I will use the subsetted dataset (2158 obs).

model_tract_sp <- st_read(dsn = '/Users/zhanglvou/Desktop/GoMailman/research/number_boilers/fuel_data/data/so2_abs_lag', layer = 'model_so2_abs_lag') %>% 
  filter(!is.na(so2_dff) & !is.na(s2_rdff) & !is.na(avg_yer)) %>% 
  as_Spatial(.)
## Reading layer `model_so2_abs_lag' from data source `/Users/zhanglvou/Desktop/GoMailman/research/number_boilers/fuel_data/data/so2_abs_lag' using driver `ESRI Shapefile'
## Simple feature collection with 2166 features and 41 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: -74.25909 ymin: 40.4774 xmax: -73.70001 ymax: 40.91758
## epsg (SRID):    NA
## proj4string:    +proj=longlat +ellps=GRS80 +no_defs

SO2

model_tract_data <- model_tract_sp@data
lm_so2 <- lm(so2_dff ~ d_ro2 + d_ro4 + d_ro6 + d_ng + d_d2 + bus + hvytrk + medtrk + car + avg_yer, data = model_tract_data)

check the coefficients of the linear regression model

lm_so2 %>% 
  broom::tidy()
## # A tibble: 11 x 5
##    term        estimate    std.error statistic  p.value
##    <chr>          <dbl>        <dbl>     <dbl>    <dbl>
##  1 (Intercept)  7.37e+1 5.20            14.2   1.21e-43
##  2 d_ro2       -5.96e-3 0.0103          -0.579 5.63e- 1
##  3 d_ro4       -3.06e-2 0.00649         -4.72  2.50e- 6
##  4 d_ro6        6.18e-2 0.00641          9.65  1.37e-21
##  5 d_ng        -9.22e-3 0.00275         -3.36  7.95e- 4
##  6 d_d2         1.80e-2 0.0814           0.221 8.25e- 1
##  7 bus          9.22e-6 0.00000136       6.79  1.49e-11
##  8 hvytrk       3.74e-6 0.000000921      4.06  5.00e- 5
##  9 medtrk      -2.13e-6 0.000000799     -2.66  7.76e- 3
## 10 car         -5.21e-8 0.0000000119    -4.38  1.24e- 5
## 11 avg_yer     -3.60e-2 0.00269        -13.4   2.61e-39
lm_so2 %>% 
  broom::glance()
## # A tibble: 1 x 11
##   r.squared adj.r.squared sigma statistic  p.value    df logLik   AIC   BIC
##       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <int>  <dbl> <dbl> <dbl>
## 1     0.177         0.173  1.97      46.2 9.36e-84    11 -4518. 9061. 9129.
## # … with 2 more variables: deviance <dbl>, df.residual <int>

R2 is 0.177.

plot the residuals. I expect them to be highly correlated.

model_tract_sp@data <- add_residuals(model_tract_sp@data, lm_so2)

model_tract_sp@data$id <- rownames(model_tract_sp@data)
gpclibPermit()
## [1] TRUE
model_tract_sp.fort <- broom::tidy(model_tract_sp, region = "id")
model_tract_sp_plot <- plyr::join(model_tract_sp.fort, model_tract_sp@data, by = "id")
ggplot(model_tract_sp_plot) +
  aes(long, lat, group = group, fill = cut(model_tract_sp_plot$resid, breaks = c(-11, -1.58, -1.01, 0, 0.945, 9))) +
  geom_polygon() +
  coord_equal() +
  scale_fill_brewer(name="Quantile", palette = "RdBu", direction = -1) +
  labs(
    title = 'Changes in natural gas'
  ) +
  north(model_tract_sp_plot) +
  theme_bw() +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()
        )

It’s very clustered

Spatial Durbin

To run the durbin model, I need to exclude some missing rows in the dataset

neighbor <- poly2nb(model_tract_sp)
list_weight <- nb2listw(neighbor)
durbin_so2 <- lagsarlm(so2_dff ~ d_ro2 + d_ro4 + d_ro6 + d_ng + d_d2, data = model_tract_sp, Durbin = TRUE, list_weight)

Some neighbors and list of weights

model_tract_sp@data$d_so2_resid <- residuals(durbin_so2)
popup_dso2_resid <- str_c('<b>GEOID: </b>', model_tract_sp$geoid, '<br>',
               '<b>Residual of spatial durbin: </b>', round(model_tract_sp$d_so2_resid, digits = 2))

pal_dso2_resid <- colorQuantile('RdBu', NULL, n = 5, reverse = TRUE)

leaflet() %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(data = model_tract_sp,
              fillColor = ~pal_dso2_resid(model_tract_sp$d_so2_resid),
              color = 'grey',
              weight = 1,
              fillOpacity = 0.8,
              popup = popup_dso2_resid,
              highlight = highlightOptions(
    weight = 4,
    color = "#666",
    fillOpacity = 0.3,
    bringToFront = TRUE))
moran_I <- function(x, y = NULL, W){
  if(is.null(y)) y = x
  
  xp <- (x - mean(x, na.rm=T))/sd(x, na.rm=T)
  yp <- (y - mean(y, na.rm=T))/sd(y, na.rm=T)
  W[which(is.na(W))] <- 0
  n <- nrow(W)
  
  global <- (xp%*%W%*%yp)/(n - 1)
  local  <- (xp*W%*%yp)
  
  list(global = global, local  = as.numeric(local))
}

simula_moran <- function(x, y = NULL, W, nsims = 1000){
  
  if(is.null(y)) y = x
  
  n   = nrow(W)
  IDs = 1:n
  
  xp <- (x - mean(x, na.rm=T))/sd(x, na.rm=T)
  W[which(is.na(W))] <- 0
  
  global_sims = NULL
  local_sims  = matrix(NA, nrow = n, ncol=nsims)
  
  ID_sample = sample(IDs, size = n*nsims, replace = T)
  
  y_s = y[ID_sample]
  y_s = matrix(y_s, nrow = n, ncol = nsims)
  y_s <- (y_s - apply(y_s, 1, mean))/apply(y_s, 1, sd)
  
  global_sims  <- as.numeric( (xp%*%W%*%y_s)/(n - 1) )
  local_sims  <- (xp*W%*%y_s)
  
  list(global_sims = global_sims,
       local_sims  = local_sims)
}

GWR

bw_so2 <- gwr.sel(so2_dff ~ d_ro2 + d_ro4 + d_ro6 + d_ng + d_d2 + bus + hvytrk + medtrk + car + avg_yer, data = model_tract_sp, gweight = gwr.Gauss, verbose = TRUE)
## Bandwidth: 24.79522 CV score: 9253.755 
## Bandwidth: 40.07945 CV score: 10095.97 
## Bandwidth: 15.34904 CV score: 7388.562 
## Bandwidth: 9.510986 CV score: 4744.764 
## Bandwidth: 5.902868 CV score: 3202.626 
## Bandwidth: 3.672928 CV score: 2749.705 
## Bandwidth: 2.144625 CV score: 2565.494 
## Bandwidth: 1.350206 CV score: 2416.616 
## Bandwidth: 0.8592285 CV score: 2426.233 
## Bandwidth: 1.16553 CV score: 2389.564 
## Bandwidth: 1.122779 CV score: 2389.252 
## Bandwidth: 1.138186 CV score: 2388.732 
## Bandwidth: 1.141723 CV score: 2388.717 
## Bandwidth: 1.141229 CV score: 2388.717 
## Bandwidth: 1.14127 CV score: 2388.717 
## Bandwidth: 1.141311 CV score: 2388.717 
## Bandwidth: 1.14127 CV score: 2388.717
gwr_so2 <- gwr(so2_dff ~ d_ro2 + d_ro4 + d_ro6 + d_ng + d_d2 + bus + hvytrk + medtrk + car + avg_yer, data = model_tract_sp, bandwidth = bw_so2, gweight = gwr.Gauss)
model_tract_sp@data$gwr_r2 <- gwr_so2$SDF$localR2
model_tract_sp@data$ro2_coeff <- gwr_so2$SDF$d_ro2
model_tract_sp@data$ro4_coeff <- gwr_so2$SDF$d_ro4
model_tract_sp@data$ng_coeff <- gwr_so2$SDF$d_ng
model_tract_sp@data$d2_coeff <- gwr_so2$SDF$d_d2
model_tract_sp@data$ro4_coeff <- gwr_so2$SDF$d_ro4
model_tract_sp@data$gwr_residual <- gwr_so2$SDF$gwr.e
popup_localr2 <- str_c('<b>GEOID: </b>', model_tract_sp$geoid, '<br>',
               '<b>Local R2: </b>', round(gwr_so2$SDF$localR2, digits = 2))

pal <- colorQuantile('Greens', NULL, n = 5)

leaflet() %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(data = gwr_so2$SDF,
              fillColor = ~pal(gwr_so2$SDF$localR2),
              color = 'grey',
              weight = 1,
              fillOpacity = 0.8,
              popup = popup_localr2,
              highlight = highlightOptions(
    weight = 4,
    color = "#666",
    fillOpacity = 0.3,
    bringToFront = TRUE))
popup_ro6 <- str_c('<b>GEOID: </b>', model_tract_sp$geoid, '<br>',
               '<b>Coefficient of #RO6 : </b>', round(gwr_so2$SDF$d_ro6, digits = 2))

leaflet() %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(data = gwr_so2$SDF,
              fillColor = ~pal(gwr_so2$SDF$d_ro6),
              color = 'grey',
              weight = 1,
              fillOpacity = 0.8,
              popup = popup_ro6,
              highlight = highlightOptions(
    weight = 4,
    color = "#666",
    fillOpacity = 0.3,
    bringToFront = TRUE))